winsafe\kernel\utilities/
w_string.rs

1use std::cmp::Ordering;
2
3use crate::co;
4use crate::decl::*;
5use crate::guard::*;
6use crate::kernel::ffi;
7use crate::prelude::*;
8
9/// Stores a `[u16]` buffer for a null-terminated
10/// [Unicode UTF-16](https://learn.microsoft.com/en-us/windows/win32/intl/unicode-in-the-windows-api)
11/// wide string natively used by Windows.
12///
13/// Uses
14/// [Short String Optimization](https://joellaity.com/2020/01/31/string.html)
15/// technique for faster performance.
16///
17/// This is struct is mostly used internally by the library, as a bridge between
18/// Windows and Rust strings.
19#[derive(Default, Clone)]
20pub struct WString {
21	buf: Buffer,
22}
23
24impl std::fmt::Display for WString {
25	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
26		let txt = match self.buf.to_string_checked() {
27			Ok(t) => t,
28			Err(e) => format!("PARSING ERROR: {}", e.to_string()),
29		};
30		std::fmt::Display::fmt(&txt, f)
31	}
32}
33impl std::fmt::Debug for WString {
34	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35		std::fmt::Debug::fmt(&self.buf, f)
36	}
37}
38
39impl std::cmp::PartialEq for WString {
40	fn eq(&self, other: &Self) -> bool {
41		self.cmp(other) == Ordering::Equal
42	}
43}
44impl std::cmp::Eq for WString {}
45
46impl std::cmp::PartialOrd for WString {
47	fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
48		let ord = unsafe { ffi::lstrcmpW(self.as_ptr(), other.as_ptr()) };
49		Some(if ord < 0 {
50			Ordering::Less
51		} else if ord > 0 {
52			Ordering::Greater
53		} else {
54			Ordering::Equal
55		})
56	}
57}
58impl std::cmp::Ord for WString {
59	fn cmp(&self, other: &Self) -> Ordering {
60		self.partial_cmp(other).unwrap()
61	}
62}
63
64impl WString {
65	/// Stack size for internal
66	/// [Short String Optimization](https://joellaity.com/2020/01/31/string.html).
67	pub const SSO_LEN: usize = Buffer::SSO_LEN;
68
69	/// Stores an UTF-16 null-terminated string from an optional [`&str`](str).
70	///
71	/// If `s` is `None` or the string is empty, no allocation is made.
72	#[must_use]
73	pub fn from_opt_str(s: Option<impl AsRef<str>>) -> Self {
74		Self { buf: Buffer::from_opt_str(s) }
75	}
76
77	/// Stores an UTF-16 null-terminated string from a [`&str`](str).
78	///
79	/// If the string is empty, no allocation is made.
80	#[must_use]
81	pub fn from_str(s: impl AsRef<str>) -> Self {
82		Self { buf: Buffer::from_str(s, ForceHeap::No) }
83	}
84
85	/// Stores an UTF-16 null-terminated string from a [`&str`](str), bypassing
86	/// [Short String Optimization](https://joellaity.com/2020/01/31/string.html)
87	/// – that is, forcing the internal allocation on the heap. This should be
88	/// rarely needed.
89	///
90	/// If the string is empty, no allocation is made.
91	#[must_use]
92	pub fn from_str_force_heap(s: impl AsRef<str>) -> Self {
93		Self { buf: Buffer::from_str(s, ForceHeap::Yes) }
94	}
95
96	/// Stores a series of UTF-16 null-terminated strings. The buffer will end
97	/// with two terminating nulls – that means further retrieval operations
98	/// will "see" only the first string.
99	///
100	/// This method can be used as an escape hatch to interoperate with other
101	/// libraries.
102	#[must_use]
103	pub fn from_str_vec(v: &[impl AsRef<str>]) -> Self {
104		Self { buf: Buffer::from_str_vec(v) }
105	}
106
107	/// Stores an UTF-16 null-terminated string by copying from a buffer,
108	/// specifying the number of chars to be copied.
109	///
110	/// The `src` buffer doesn't need to be null-terminated.
111	#[must_use]
112	pub fn from_wchars_count(src: *const u16, num_chars: usize) -> Self {
113		Self {
114			buf: Buffer::from_wchars_count(src, num_chars),
115		}
116	}
117
118	/// Stores an UTF-16 null-terminated string by copying from a
119	/// null-terminated buffer. The string length is retrieved with
120	/// [`lstrlen`](https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-lstrlenw).
121	///
122	/// # Safety
123	///
124	/// Be sure the string is null-terminated, otherwise `lstrlen` will get
125	/// lost, possibly reading an invalid memory location.
126	#[must_use]
127	pub unsafe fn from_wchars_nullt(src: *const u16) -> Self {
128		Self { buf: Buffer::from_wchars_nullt(src) }
129	}
130
131	/// Stores an UTF-16 null-terminated string by copying from a slice.
132	///
133	/// The `src` slice doesn't need to be null-terminated.
134	#[must_use]
135	pub fn from_wchars_slice(src: &[u16]) -> Self {
136		Self { buf: Buffer::from_wchars_slice(src) }
137	}
138
139	/// Creates a new, empty `WString`. No allocation is made.
140	#[must_use]
141	pub const fn new() -> Self {
142		Self { buf: Buffer::new() }
143	}
144
145	/// Allocates an UTF-16 buffer with an specific length. All elements will be
146	/// set to zero.
147	#[must_use]
148	pub fn new_alloc_buf(sz: usize) -> Self {
149		Self {
150			buf: Buffer::new_alloc_buf(sz, ForceHeap::No),
151		}
152	}
153
154	/// Returns a mutable
155	/// [`LPWSTR`](https://learn.microsoft.com/en-us/windows/win32/learnwin32/working-with-strings)
156	/// pointer to the internal UTF-16 string buffer, to be passed to native
157	/// Win32 functions. This is useful to receive strings.
158	///
159	/// # Panics
160	///
161	/// Panics if the buffer was not allocated.
162	///
163	/// # Safety
164	///
165	/// Be sure to alloc enough room, otherwise a buffer overrun may occur.
166	#[must_use]
167	pub unsafe fn as_mut_ptr(&mut self) -> *mut u16 {
168		self.buf.as_mut_ptr()
169	}
170
171	/// Returns a mutable slice to the internal UTF-16 string buffer.
172	#[must_use]
173	pub fn as_mut_slice(&mut self) -> &mut [u16] {
174		self.buf.as_mut_slice()
175	}
176
177	/// Returns a
178	/// [`LPCWSTR`](https://learn.microsoft.com/en-us/windows/win32/learnwin32/working-with-strings)
179	/// pointer to the internal UTF-16 string buffer, to be passed to native
180	/// Win32 functions.
181	///
182	/// If the buffer was not allocated, returns a null pointer.
183	#[must_use]
184	pub fn as_ptr(&self) -> *const u16 {
185		self.buf.as_ptr()
186	}
187
188	/// Returns a slice to the internal UTF-16 string buffer.
189	#[must_use]
190	pub fn as_slice(&self) -> &[u16] {
191		self.buf.as_slice()
192	}
193
194	/// Returns the size of the allocated internal buffer, in `u16` wide chars.
195	/// Note that the terminating null, if existing, is also counted.
196	///
197	/// If the buffer was not allocated yet, returns zero.
198	#[must_use]
199	pub const fn buf_len(&self) -> usize {
200		self.buf.buf_len()
201	}
202
203	/// Copies the content into an external buffer. A terminating null will be
204	/// appended.
205	///
206	/// If `dest` is smaller, the string will be truncated.
207	///
208	/// If `dest` has 1 element, it will receive only the terminating null.
209	pub fn copy_to_slice(&self, dest: &mut [u16]) {
210		if !dest.is_empty() {
211			let usable_len = dest.len() - 1; // leave room for terminating null
212			self.as_slice()
213				.iter()
214				.zip(dest[..usable_len].iter_mut())
215				.for_each(|(src, dest)| *dest = *src);
216			dest[usable_len..]
217				.iter_mut()
218				.for_each(|dest| *dest = 0x0000); // fill the rest with zero
219		}
220	}
221
222	/// Fills the entire buffer with zeros.
223	pub fn fill_with_zero(&mut self) {
224		self.as_mut_slice().iter_mut().for_each(|ch| *ch = 0x0000);
225	}
226
227	/// Returns `true` if the internal buffer has been allocated.
228	#[must_use]
229	pub const fn is_allocated(&self) -> bool {
230		self.buf.is_allocated()
231	}
232
233	/// Converts into [`String`](std::string::String) by calling
234	/// [`String::from_utf16`](std::string::String::from_utf16). An uncallocated
235	/// will simply be converted into an empty string.
236	///
237	/// This method is useful if you're parsing raw data which may contain
238	/// invalid characters. If you're dealing with a string known to be valid,
239	/// [`to_string`](std::string::ToString::to_string) is more practical.
240	#[must_use]
241	pub fn to_string_checked(&self) -> Result<String, std::string::FromUtf16Error> {
242		self.buf.to_string_checked()
243	}
244
245	/// Wrapper to
246	/// [`lstrlen`](https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-lstrlenw).
247	///
248	/// Returns the number of [`u16`] characters stored in the internal buffer,
249	/// not counting the terminating null.
250	#[must_use]
251	pub fn str_len(&self) -> usize {
252		unsafe { ffi::lstrlenW(self.buf.as_ptr()) as _ }
253	}
254
255	/// Converts the string to lower case, in-place. Wrapper to
256	/// [`CharLower`](https://learn.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-charlowerw).
257	pub fn make_lowercase(&mut self) {
258		unsafe {
259			ffi::CharLowerW(self.as_mut_ptr());
260		}
261	}
262
263	/// Converts the string to upper case, in-place. Wrapper to
264	/// [`CharUpper`](https://learn.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-charupperw).
265	pub fn make_uppercase(&mut self) {
266		unsafe {
267			ffi::CharUpperW(self.as_mut_ptr());
268		}
269	}
270
271	/// Guesses the encoding with [`Encoding::guess`](crate::Encoding::guess)
272	/// and parses the data as a string.
273	///
274	/// If you're sure the data has UTF-8 encoding, you can also use the
275	/// built-in [`String::from_utf8`](std::string::String::from_utf8).
276	///
277	/// To serialize the string back into UTF-8 bytes, use the built-in
278	/// [`String::into_bytes`](std::string::String::into_bytes).
279	///
280	/// # Examples
281	///
282	/// Usually the fastest way to read the text from a file is by mapping its
283	/// contents in memory with [`FileMapped`](crate::FileMapped), then parsing:
284	///
285	/// ```no_run
286	/// use winsafe::{self as w, prelude::*};
287	///
288	/// let file_in = w::FileMapped::open(
289	///     "C:\\Temp\\foo.txt",
290	///     w::FileAccess::ExistingReadOnly,
291	/// )?;
292	/// let wstr = w::WString::parse(file_in.as_slice())?;
293	/// let str_contents = wstr.to_string();
294	/// # w::SysResult::Ok(())
295	/// ```
296	#[must_use]
297	pub fn parse(data: &[u8]) -> SysResult<Self> {
298		let mut data = data;
299		if data.is_empty() {
300			return Ok(Self::new()); // nothing to parse
301		}
302
303		let (encoding, sz_bom) = Encoding::guess(data);
304		data = &data[sz_bom..]; // skip BOM, if any
305
306		Ok(Self::from_wchars_slice(&match encoding {
307			Encoding::Ansi => Self::parse_ansi(data),
308			Encoding::Win1252 => MultiByteToWideChar(co::CP::WINDOWS_1252, co::MBC::NoValue, data)?,
309			Encoding::Utf8 => MultiByteToWideChar(co::CP::UTF8, co::MBC::NoValue, data)?,
310			Encoding::Utf16be => Self::parse_utf16(data, true),
311			Encoding::Utf16le => Self::parse_utf16(data, false),
312			Encoding::Utf32be
313			| Encoding::Utf32le
314			| Encoding::Scsu
315			| Encoding::Bocu1
316			| Encoding::Unknown => panic!("Encoding {} not implemented.", encoding),
317		}))
318	}
319
320	fn parse_ansi(data: &[u8]) -> Vec<u16> {
321		data.iter()
322			.take_while(|ch| **ch != 0x0000) // ignore terminating null, if any
323			.map(|ch| *ch as u16) // raw u8 to u16 conversion
324			.collect()
325	}
326
327	fn parse_utf16(data: &[u8], is_big_endian: bool) -> Vec<u16> {
328		let data = if data.len() % 2 == 1 {
329			&data[..data.len() - 1] // if odd number of bytes, discard last one
330		} else {
331			data
332		};
333
334		data.chunks(2)
335			.take_while(|ch2| **ch2 != [0x00, 0x00]) // ignore terminating null, if any
336			.map(|ch2| {
337				if is_big_endian {
338					u16::from_be_bytes(ch2.try_into().unwrap())
339				} else {
340					u16::from_le_bytes(ch2.try_into().unwrap())
341				}
342			})
343			.collect()
344	}
345}
346
347#[derive(PartialEq, Eq)]
348enum ForceHeap {
349	Yes,
350	No,
351}
352
353enum Buffer {
354	Stack([u16; Self::SSO_LEN]),
355	Heap(usize, GlobalFreeGuard), // keep memory size in bytes
356	Unallocated,
357}
358
359impl Default for Buffer {
360	fn default() -> Self {
361		Self::Unallocated
362	}
363}
364
365impl Clone for Buffer {
366	fn clone(&self) -> Self {
367		match self {
368			Self::Unallocated => Self::Unallocated,
369			_ => {
370				let mut new_self = Self::new_alloc_buf(self.buf_len(), ForceHeap::No);
371				self.as_slice()
372					.iter()
373					.zip(new_self.as_mut_slice())
374					.for_each(|(src, dest)| *dest = *src);
375				new_self
376			},
377		}
378	}
379}
380
381impl std::fmt::Debug for Buffer {
382	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
383		let txt = match self.to_string_checked() {
384			Ok(t) => t,
385			Err(e) => format!("PARSING ERROR: {}", e.to_string()),
386		};
387		write!(
388			f,
389			"{}",
390			match self {
391				Self::Stack(_) => format!("STACK({}) \"{}\"", self.buf_len(), txt),
392				Self::Heap(_, _) => format!("HEAP({}) \"{}\"", self.buf_len(), txt),
393				Self::Unallocated => "UNALLOCATED \"\"".to_owned(),
394			}
395		)
396	}
397}
398
399impl Buffer {
400	pub const SSO_LEN: usize = 20;
401
402	#[must_use]
403	fn from_opt_str(s: Option<impl AsRef<str>>) -> Self {
404		match s {
405			Some(s) => Self::from_str(s, ForceHeap::No),
406			None => Self::Unallocated,
407		}
408	}
409
410	#[must_use]
411	fn from_str(s: impl AsRef<str>, force_heap: ForceHeap) -> Self {
412		let s_len = s.as_ref().encode_utf16().count();
413		if s_len == 0 {
414			Self::Unallocated
415		} else {
416			let num_chars = s_len + 1; // room for terminating null
417			let mut new_self = Self::new_alloc_buf(num_chars, force_heap);
418			s.as_ref()
419				.encode_utf16()
420				.zip(new_self.as_mut_slice())
421				.for_each(|(src, dest)| *dest = src);
422			new_self
423		}
424	}
425
426	#[must_use]
427	fn from_str_vec(v: &[impl AsRef<str>]) -> Self {
428		let tot_chars = v.iter() // number of chars of all strings, including terminating nulls
429			.fold(0, |tot, s| tot + s.as_ref().chars().count() + 1) // include terminating null
430			+ 1; // double terminating null
431		let mut new_self = Self::new_alloc_buf(tot_chars, ForceHeap::No);
432		v.iter()
433			.map(|s| {
434				s.as_ref().encode_utf16().chain(std::iter::once(0x0000)) // append terminating null on each string
435			})
436			.flatten()
437			.zip(new_self.as_mut_slice())
438			.for_each(|(src, dest)| *dest = src);
439		new_self
440	}
441
442	#[must_use]
443	fn from_wchars_count(src: *const u16, num_chars: usize) -> Self {
444		if src.is_null() || num_chars == 0 {
445			Self::Unallocated
446		} else {
447			Self::from_wchars_slice(unsafe { std::slice::from_raw_parts(src, num_chars) })
448		}
449	}
450
451	#[must_use]
452	unsafe fn from_wchars_nullt(src: *const u16) -> Self {
453		Self::from_wchars_count(src, unsafe { ffi::lstrlenW(src) as _ })
454	}
455
456	#[must_use]
457	fn from_wchars_slice(src: &[u16]) -> Self {
458		if src.is_empty() {
459			Self::Unallocated
460		} else {
461			let num_chars = src
462				.iter()
463				.take_while(|ch| **ch != 0x0000) // skip terminating null, if any
464				.count() + 1; // room for terminating null
465			let mut new_self = Self::new_alloc_buf(num_chars, ForceHeap::No);
466			src.iter()
467				.take_while(|ch| **ch != 0x0000) // skip terminating null, if any
468				.zip(new_self.as_mut_slice())
469				.for_each(|(src, dest)| *dest = *src);
470			new_self
471		}
472	}
473
474	#[must_use]
475	const fn new() -> Self {
476		Self::Unallocated
477	}
478
479	#[must_use]
480	fn new_alloc_buf(num_chars: usize, force_heap: ForceHeap) -> Self {
481		if num_chars == 0 {
482			Self::Unallocated
483		} else if force_heap == ForceHeap::Yes || num_chars > Self::SSO_LEN {
484			Self::Heap(
485				num_chars * std::mem::size_of::<u16>(),
486				HGLOBAL::GlobalAlloc(
487					co::GMEM::FIXED | co::GMEM::ZEROINIT,
488					num_chars * std::mem::size_of::<u16>(),
489				)
490				.unwrap(), // assume no allocation errors
491			)
492		} else {
493			Self::Stack([0x0000; Self::SSO_LEN])
494		}
495	}
496
497	#[must_use]
498	unsafe fn as_mut_ptr(&mut self) -> *mut u16 {
499		match self {
500			Self::Stack(arr) => arr.as_mut_ptr(),
501			Self::Heap(_, ptr) => ptr.ptr() as _,
502			Self::Unallocated => panic!("Trying to use an unallocated WString buffer."),
503		}
504	}
505
506	#[must_use]
507	fn as_mut_slice(&mut self) -> &mut [u16] {
508		match self {
509			Self::Stack(arr) => arr,
510			Self::Heap(_, ptr) => unsafe {
511				std::slice::from_raw_parts_mut(ptr.ptr() as _, self.buf_len())
512			},
513			Self::Unallocated => &mut [],
514		}
515	}
516
517	#[must_use]
518	fn as_ptr(&self) -> *const u16 {
519		match self {
520			Self::Stack(arr) => arr.as_ptr(),
521			Self::Heap(_, ptr) => ptr.ptr() as _,
522			Self::Unallocated => std::ptr::null(),
523		}
524	}
525
526	#[must_use]
527	fn as_slice(&self) -> &[u16] {
528		match self {
529			Self::Stack(arr) => arr,
530			Self::Heap(_, ptr) => unsafe {
531				std::slice::from_raw_parts(ptr.ptr() as _, self.buf_len())
532			},
533			Self::Unallocated => &[],
534		}
535	}
536
537	#[must_use]
538	const fn buf_len(&self) -> usize {
539		match self {
540			Self::Stack(arr) => arr.len(),
541			Self::Heap(sz_bytes, _) => *sz_bytes / std::mem::size_of::<u16>(),
542			Self::Unallocated => 0,
543		}
544	}
545
546	#[must_use]
547	const fn is_allocated(&self) -> bool {
548		match self {
549			Self::Unallocated => false,
550			_ => true,
551		}
552	}
553
554	#[must_use]
555	fn to_string_checked(&self) -> Result<String, std::string::FromUtf16Error> {
556		match self {
557			Self::Unallocated => Ok(String::new()),
558			_ => String::from_utf16(
559				&self
560					.as_slice()
561					.into_iter()
562					.take_while(|ch| **ch != 0x0000) // remove all trailing zeros
563					.map(|ch| *ch)
564					.collect::<Vec<_>>(),
565			),
566		}
567	}
568}